library(ggplot2)
library(dplyr)
library(haven)

# SPECIFY THE OVERALL PATH HERE
path <- "/Users/ben_elsner/github repos/pollutionvoting/replication_pack/"


filename <- paste0(path, "tempdata/kreise_elections_collapsed.dta")
df <- read_dta(filename)

df <- df %>%
  filter(
    !grepl("Hamburg", electionname),
    !grepl("Berlin", electionname),
    !grepl("Bremen", electionname)
  ) %>%
  mutate(
    diff = Incumbent_share_weight - share_woPM_IV1,
    election_order = factor(electionname, levels = electionname[order(diff)])
  )



# Plot
p <- ggplot(df, aes(x = diff, y = election_order)) +
  geom_segment(aes(x = 0, xend = diff, y = election_order, yend = election_order), color = "blue") +
  geom_point(aes(x = diff, y = election_order), size = 3, color = "blue") +
  geom_vline(xintercept = 0, color = "red", size = 1) +
  labs(x = "Predicted Difference Incumbent Share", y = "") +
  theme_bw() +  # White background theme
  theme(
    text = element_text(size = 12),  # Base font size for all text elements
    axis.title.x = element_text(size = 14),
    panel.grid.major = element_blank(),  # Remove major grid lines
    panel.grid.minor = element_blank(),  # Remove minor grid lines
    plot.margin = margin(1, 1, 1, 1, "cm")  # Adjust margins
  )

# Print the plot
print(p)

# Export the plot to a PNG file
outputfile <- paste0(path, "outputs/figure_S14.png")
ggsave(outputfile, plot=p, width = 8.27, height = 11.69, units = "in", dpi = 300)


# Number outside the [-1,1] interval

count <- df %>%
  filter(diff > 1 | diff < -1) %>%
  nrow()

print(count)

count2 <- df %>%
  filter(diff > 2 | diff < -2) %>%
  nrow()

print(count2)

